# # -*- coding: utf-8 -*-
# import copy
# import scrapy
# from scrapy import Request
# from scrapy.exceptions import IgnoreRequest
#
# from esgcc_sub.util.login import SeleniumLogin
# from zc_core.util.http_util import retry_request
# from zc_core.model.items import Box
# from esgcc_sub.rules import *
#
#
# class SkuSpider(BaseSpider):
#     name = 'sku'
#     # 常用链接
#     index_url = 'http://b.esgcc.com.cn/sx'
#     catalog_url = 'http://b.esgcc.com.cn/general/commons/wisdom2/main_home_pilot_wrap.htm?marketSite=21008&product_classification_navigation_style=3'
#     # supplierId, page, startPrice, endPrice
#     sku_list_url = 'http://b.esgcc.com.cn/sortSearch/showSearchPage?q={}&sortType=id&targetPage={}&sortWay=&mSite=MjEwMDg=&prodCatIdForProp=null'
#
#     def __init__(self, batchNo=None, *args, **kwargs):
#         super(SkuSpider, self).__init__(*args, **kwargs)
#         if not batchNo:
#             self.batch_no = time_to_batch_no(datetime.now())
#         else:
#             self.batch_no = int(batchNo)
#
#     def start_requests(self):
#         # cookies = SeleniumLogin().get_cookies()
#         cookies = {'JSESSIONID': 'D0BA9F7A4BBB368B29F79EEEC8EC45EE', '__d_s_': '3CCCB516222550FEDAA106C36244C9BE', '__s_f_c_s_': '3CCCB516222550FEDAA106C36244C9BE', '__t_c_k_': '05775724b48e4729aa62cb0c195c0127'}
#         if not cookies:
#             self.logger.error('init cookie failed...')
#             return
#         self.logger.info('init cookie: %s', cookies)
#
#         # 品类、品牌
#         yield Request(
#             url=self.catalog_url,
#             meta={
#                 'reqType': 'catalog',
#                 'batchNo': self.batch_no,
#             },
#             headers={
#                 'Connection': 'keep-alive',
#                 'Cache-Control': 'max-age=0',
#                 'Upgrade-Insecure-Requests': '1',
#                 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400',
#                 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
#                 'Referer': 'http://b.esgcc.com.cn/sx',
#                 'Accept-Encoding': 'gzip, deflate',
#                 'Accept-Language': 'zh-CN,zh;q=0.9',
#             },
#             cookies=cookies,
#             callback=self.parse_catalog,
#             errback=self.error_back,
#             priority=200,
#         )
#
#     # 处理catalog列表
#     def parse_catalog(self, response):
#         # 品类
#         cats = parse_catalog(response)
#         if cats:
#             self.logger.info('品类: count[%s]' % len(cats))
#             yield Box('catalog', self.batch_no, cats)
#
#             for cat in cats:
#                 # 请求sku
#                 level = cat.get('level')
#                 if level == 3:
#                     page = 1
#                     cat_id = cat.get('catalogId')
#                     yield Request(
#                         url=self.sku_list_url.format(cat_id, page),
#                         callback=self.parse_sku_page,
#                         errback=self.error_back,
#                         cookies=copy.deepcopy(response.request.cookies),
#                         meta={
#                             'reqType': 'sku',
#                             'batchNo': self.batch_no,
#                             'catId': cat_id,
#                             'page': page
#                         },
#                         headers={
#                             'Connection': 'keep-alive',
#                             'Accept': 'text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01',
#                             'X-Requested-With': 'XMLHttpRequest',
#                             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400',
#                             'Referer': 'http://b.esgcc.com.cn/sx',
#                             'Accept-Encoding': 'gzip, deflate',
#                             'Accept-Language': 'zh-CN,zh;q=0.9',
#                         },
#                         priority=100,
#                         dont_filter=True
#                     )
#
#     # 处理catalog列表
#     def parse_sku_page(self, response):
#         meta = response.meta
#         cat_id = meta.get('catId')
#         # 处理品类列表
#         pages = parse_sku_page(response)
#         if pages:
#             self.logger.info('总页数: cat=%s, total=%s' % (cat_id, pages))
#             if pages > 500:
#                 self.logger.info('分页超限: cat=%s, total=%s' % (cat_id, pages))
#             for page in range(1, pages + 1):
#                 yield Request(
#                     url=self.sku_list_url.format(cat_id, page),
#                     callback=self.parse_sku,
#                     errback=self.error_back,
#                     cookies=copy.deepcopy(response.request.cookies),
#                     meta={
#                         'reqType': 'sku',
#                         'batchNo': self.batch_no,
#                         'catId': cat_id,
#                         'page': page
#                     },
#                     headers={
#                         'Connection': 'keep-alive',
#                         'Accept': 'text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01',
#                         'X-Requested-With': 'XMLHttpRequest',
#                         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400',
#                         'Referer': 'http://b.esgcc.com.cn/sx',
#                         'Accept-Encoding': 'gzip, deflate',
#                         'Accept-Language': 'zh-CN,zh;q=0.9',
#                     },
#                     priority=100,
#                     dont_filter=True
#                 )
#         else:
#             self.logger.info('无分页: cat=%s, total=%s' % (cat_id, pages))
#
#     # 处理sku列表
#     def parse_sku(self, response):
#         meta = response.meta
#         page = meta.get('page')
#         cat_id = meta.get('catId')
#
#         sku_list = parse_sku(response)
#         if sku_list:
#             self.logger.info('清单: sp=%s, page=%s, count=%s' % (cat_id, page, len(sku_list)))
#             yield Box('sku', self.batch_no, sku_list)
#         else:
#             self.logger.info('分页完成: sp=%s, page=%s' % (cat_id, page))
#
#     # 错误处理
#     def error_back(self, e):
#         self.logger.error(e)
#         if e.type and e.type == IgnoreRequest:
#             self.logger.info(e.value)
#         else:
#             if e.request:
#                 self.logger.error('请求异常: [%s][%s] -> [%s]' % (str(type(e)), e.request.url, e.request.meta))
#                 yield retry_request(e.request)
#             else:
#                 self.logger.error('未知异常: %s' % e)
